Results¶

This notebook is for rendering figures based on the results of model.

In [ ]:
import pandas as pd
import numpy as np
import plotly.io as pio
import plotly.graph_objs as go
import plotly.express as px
from plotly.offline import init_notebook_mode

# Initialize Plotly to work in Jupyter notebook
init_notebook_mode(connected=True)
pio.renderers.default = 'notebook'

Training¶

Loading Training Data¶

In [ ]:
data_directory = "./output/data/"
In [ ]:
df_dqn = pd.read_csv(data_directory+"DQN/"+"training_metrics.csv")
df_ddqn = pd.read_csv(data_directory+"dDQN/"+"training_metrics.csv")
df_dueldqn = pd.read_csv(data_directory+"duelDQN/"+"training_metrics.csv")

Performance Comparison: Learning Curves¶

Learning Curve Helper Functions¶

In [ ]:
def plot_total_reward_and_moving_average(df, window_size=10):
    fig = go.Figure()
    colors = px.colors.qualitative.Plotly  # Use Plotly's qualitative color palette
    
    # Loop through each column in the dataframe
    for idx, column in enumerate(df.columns):
        total_reward_per_episode = df[column].values
        
        # Calculate the moving average
        moving_avg_rewards = moving_average(total_reward_per_episode, window_size)
        
        # Calculate the rolling standard deviation for the shaded area
        rolling_std = np.array([np.std(total_reward_per_episode[i:i + window_size]) for i in range(len(moving_avg_rewards))])
        lower_bound = moving_avg_rewards - rolling_std
        upper_bound = moving_avg_rewards + rolling_std

        # Padding for the moving average and bounds
        padding = (len(total_reward_per_episode) - len(moving_avg_rewards)) // 2
        moving_avg_rewards_padded = np.pad(moving_avg_rewards, (padding, padding), 'edge')
        lower_bound_padded = np.pad(lower_bound, (padding, padding), 'edge')
        upper_bound_padded = np.pad(upper_bound, (padding, padding), 'edge')

        # Adjusting the padded arrays to match total_reward_per_episode length
        if len(moving_avg_rewards_padded) < len(total_reward_per_episode):
            diff = len(total_reward_per_episode) - len(moving_avg_rewards_padded)
            moving_avg_rewards_padded = np.pad(moving_avg_rewards_padded, (0, diff), 'edge')
            lower_bound_padded = np.pad(lower_bound_padded, (0, diff), 'edge')
            upper_bound_padded = np.pad(upper_bound_padded, (0, diff), 'edge')
        else:
            moving_avg_rewards_padded = moving_avg_rewards_padded[:len(total_reward_per_episode)]
            lower_bound_padded = lower_bound_padded[:len(total_reward_per_episode)]
            upper_bound_padded = upper_bound_padded[:len(total_reward_per_episode)]

        line_color = colors[idx % len(colors)]
        fill_color = lighter_color(line_color)

        # Add the moving average line
        fig.add_trace(go.Scatter(
            x=np.arange(len(total_reward_per_episode)),
            y=moving_avg_rewards_padded,
            mode='lines',
            line=dict(color=line_color),
            name=f'{column} Moving Average',
            legendgroup=f'{column}',  # Group line and shaded area
            showlegend=True
        ))

        # Add the shaded area for the std deviation
        fig.add_trace(go.Scatter(
            x=np.concatenate([np.arange(len(total_reward_per_episode)), np.arange(len(total_reward_per_episode))[::-1]]),
            y=np.concatenate([upper_bound_padded, lower_bound_padded[::-1]]),
            fill='toself',
            fillcolor=fill_color,
            line=dict(color='rgba(255, 255, 255, 0)'),
            name=f'{column} Std Dev',
            legendgroup=f'{column}', # Group with corresponding line
            showlegend=False,  # Do not show separate legend for shaded area  
        ))

    fig.update_layout(
        title='Learning Curve',
        xaxis_title='Episode',
        yaxis_title='Total Reward',
        width=1000,  # Set the width of the plot
        height=800,  # Set the height of the plot
        legend=dict(
            x=0.99,  # Position at 99% along the x-axis
            y=0.01,  # Position at 1% along the y-axis
            xanchor='right',  # Anchor legend box's right corner
            yanchor='bottom'  # Anchor legend box's bottom corner
        ),
        showlegend=True
    )

    pio.show(fig)

def moving_average(values, window):
    weights = np.repeat(1.0, window) / window
    sma = np.convolve(values, weights, 'valid')
    return sma

def combine_columns(dfs, column_name='Total Reward'):
    combined_df = pd.concat([df[[column_name]].rename(columns={column_name: f'{name}_{column_name}'}) for df, name in dfs], axis=1)
    return combined_df

def lighter_color(color, intensity=0.2):
    # Parse color into r, g, b
    color = color.lstrip('#')
    lv = len(color)
    rgb = tuple(int(color[i:i + lv // 3], 16) for i in range(0, lv, lv // 3))
    return 'rgba({}, {}, {}, {})'.format(rgb[0], rgb[1], rgb[2], intensity)
In [ ]:
dataframes = [(df_dqn, 'DQN'), (df_ddqn, 'dDQN'), (df_dueldqn, 'duelDQN')]

# Combine the Total Reward columns
combined_df = combine_columns(dataframes, column_name='Total Reward')

Figure: Learning Curve¶

In [ ]:
plot_total_reward_and_moving_average(combined_df, window_size=10)

Training Loss¶

Training loss curves

In [ ]:
def plot_columns(df,title,ytitle):
    fig = go.Figure()


    # Add a trace for each column in the dataframe
    for i, column in enumerate(df.columns):
        smoothed_loss = moving_average(df[column], window=5)
        fig.add_trace(go.Scatter(
            x=df.index,
            y=smoothed_loss,
            mode='lines',
            name=column,
            line=dict(width=2)
        ))

    # Update layout

    if ytitle == "Total Reward":
        fig.update_layout(
                title=title,
                xaxis_title='Episode',
                yaxis_title=ytitle,
                width=800,
                height=600,
                legend=dict(
                    x=0.99,  # Position at 99% along the x-axis
                    y=0.1,  # Position at 1% along the y-axis
                    xanchor='right',  # Anchor legend box's right corner
                    yanchor='bottom'  # Anchor legend box's bottom corner
                ),
                showlegend=True
            )
    else:
        fig.update_layout(
            title=title,
            xaxis_title='Episode',
            yaxis_title=ytitle,
            width=800,
            height=600,
            legend=dict(
                x=0.99,  # Position at 99% along the x-axis
                y=0.7,  # Position at 1% along the y-axis
                xanchor='right',  # Anchor legend box's right corner
                yanchor='bottom'  # Anchor legend box's bottom corner
            ),
            showlegend=True
        )

    # Show plot
    fig.show()
In [ ]:
loss_df = combine_columns(dataframes, column_name='Average Loss')
In [ ]:
plot_columns(loss_df,'Average Loss for Different Models','Average Loss')

Comparison of Average Training Time¶

Average training time (over 50 episodes) of each architecture with identical settings.

In [ ]:
time_df = combine_columns(dataframes, column_name='Training Time')
time_df.mean()
Out[ ]:
DQN_Training Time        10.719233
dDQN_Training Time       10.697270
duelDQN_Training Time    24.309608
dtype: float64

Hyperparameters¶

Effects of hyperparameters on DQN model.

Epsilon Decay¶

In [ ]:
df_eps75 = pd.read_csv(data_directory+"DQN_eps-0.75/"+"training_metrics.csv")
df_eps9 = pd.read_csv(data_directory+"DQN_eps-0.9/"+"training_metrics.csv")
df_eps999 = pd.read_csv(data_directory+"DQN_eps-0.999/"+"training_metrics.csv")
In [ ]:
decay = [(df_eps75, 'eps=0.75'), (df_eps9, 'eps=0.9'), (df_dqn, 'eps=0.995'), (df_eps999, 'eps=0.999')]
decay_df = combine_columns(decay, column_name='Total Reward')
decay2_df = combine_columns(decay, column_name='Average Loss')
In [ ]:
plot_columns(decay_df,"Effect of Epsilon Decay","Total Reward")
plot_columns(decay2_df,"Effect of Epsilon Decay","Average Loss")

Discount Factor¶

In [ ]:
df_gamma75 = pd.read_csv(data_directory+"DQN_gamma-0.75/"+"training_metrics.csv")
df_gamma9 = pd.read_csv(data_directory+"DQN_gamma-0.9/"+"training_metrics.csv")
df_gamma5 = pd.read_csv(data_directory+"DQN_gamma-0.5/"+"training_metrics.csv")
In [ ]:
discount = [(df_gamma5, 'gamma=0.5'), (df_gamma75, 'gamma=0.75'), (df_gamma9, 'gamma=0.9'), (df_dqn, 'gamma=0.99')]
discount_df = combine_columns(discount, column_name='Total Reward')
discount2_df = combine_columns(discount, column_name='Average Loss')
In [ ]:
plot_columns(discount_df,"Effect of Discount Factor","Total Reward")
plot_columns(discount2_df,"Effect of Discount Factor","Average Loss")

Learning Rate¶

In [ ]:
df_lr01 = pd.read_csv(data_directory+"DQN_lr-0.01/"+"training_metrics.csv")
df_lr001 = pd.read_csv(data_directory+"DQN_lr-0.001/"+"training_metrics.csv")
df_lr00001 = pd.read_csv(data_directory+"DQN_lr-0.00001/"+"training_metrics.csv")
df_lr000001 = pd.read_csv(data_directory+"DQN_lr-0.000001/"+"training_metrics.csv")
In [ ]:
learning = [(df_lr01, 'lr=0.01'), (df_lr001, 'lr=0.001'), (df_dqn, 'lr=0.0001'), (df_lr00001, 'lr=0.00001'), (df_lr000001, 'lr=0.000001')]
lr_df = combine_columns(learning, column_name='Total Reward')
lr2_df = combine_columns(learning, column_name='Average Loss')
In [ ]:
plot_columns(lr_df,"Effect of Learning Rate","Total Reward")
plot_columns(lr2_df,"Effect of Learning Rate","Average Loss")

Evaluation¶

In [ ]:
import torch
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)
cpu

Environment¶

We can load in our environment, this must match the training environment which had a state space of (100,100), 3 UAVs, and 20 users.

In [ ]:
from core.uav_env import UAVEnv
env = UAVEnv(num_users=20, num_uavs=3, area_size=(100, 100))
state_size = env.observation_space.shape[0] * env.observation_space.shape[1]
action_size = env.action_space.n

Visualize Environment¶

We can visualize the environment through the visualize_uav_positions from ./visualization/visualize.py. This works on the initial state to N steps. Please not it becomes laggy with large number of time steps.

In [ ]:
from visualization.visualize import visualize_uav_positions
env.reset()
visualize_uav_positions(env.user_positions, env.uav_positions)

Configuration¶

Loaded the config file from training. The input is the specific architecture. The options are "DQN", "DoubleDQN", and "DuelingDQN"

In [ ]:
from configs.config import Config
config_dqn = Config("DQN")
config_ddqn = Config("DoubleDQN")
config_dueldqn = Config("DuelingDQN")

Here's an example of the configuration parameters.

In [ ]:
config_dqn.to_dict()
Out[ ]:
{'network': 'DQN',
 'num_episodes': 50,
 'batch_size': 64,
 'gamma': 0.75,
 'learning_rate': 1e-05,
 'epsilon_start': 1.0,
 'epsilon_end': 0.1,
 'epsilon_decay': 0.75,
 'target_update': 10,
 'checkpoint_interval': 10,
 'evaluation_interval': 5,
 'replay_buffer_capacity': 5000}

Load Trained Policy¶

We can load the trained models.

In [ ]:
dqn_model = torch.load(data_directory+"DQN"+'/best_model/best_model.pth.tar', map_location=device)
ddqn_model = torch.load(data_directory+"dDQN"+'/best_model/best_model.pth.tar', map_location=device)
dueldqn_model = torch.load(data_directory+"duelDQN"+'/best_model/best_model.pth.tar', map_location=device)

We can set up the policy nets.

In [ ]:
from core.dqn import DQN, DuelingDQN
# DQN
policy_net_dqn = DQN(state_size, env.num_uavs, action_size).to(device)
policy_net_dqn.load_state_dict(dqn_model['policy_net_state_dict'])
# Double DQN
policy_net_ddqn = DQN(state_size, env.num_uavs, action_size).to(device)
policy_net_ddqn.load_state_dict(ddqn_model['policy_net_state_dict'])
# Dueling DQN
policy_net_dueldqn = DuelingDQN(state_size, env.num_uavs, action_size).to(device)
policy_net_dueldqn.load_state_dict(dueldqn_model['policy_net_state_dict'])
Out[ ]:
<All keys matched successfully>

Playing Trained Policy¶

We can use evaluate_policy from ./evaluation/evaluate.py to play our trained policy.

In [ ]:
from evaluation.evaluate import evaluate_policy
average_reward_dqn, user_positions_dqn, uav_positions_dqn = evaluate_policy(env, policy_net_dqn, num_episodes=1, device=device)
average_reward_ddqn, user_positions_ddqn, uav_positions_ddqn = evaluate_policy(env, policy_net_ddqn, num_episodes=1, device=device)
average_reward_dueldqn, user_positions_dueldqn, uav_positions_dueldqn = evaluate_policy(env, policy_net_dueldqn, num_episodes=1, device=device)
Episode 0 UAV Performance Metrics:
  Total Distance Traveled: 4999.15
  Average Distance Per Step: 5.00
  Number of Users Covered: 20
  Average Coverage Per Step: 0.02
  Total Time: 1000
  Total Rewards: -234668.89
  Average Reward Per Step: -234.67
Average Reward over 1 episodes: -234668.8890813881
Episode 0 UAV Performance Metrics:
  Total Distance Traveled: 4969.21
  Average Distance Per Step: 4.97
  Number of Users Covered: 19
  Average Coverage Per Step: 0.02
  Total Time: 1000
  Total Rewards: -192344.76
  Average Reward Per Step: -192.34
Average Reward over 1 episodes: -192344.75784965386
Episode 0 UAV Performance Metrics:
  Total Distance Traveled: 3569.58
  Average Distance Per Step: 3.57
  Number of Users Covered: 19
  Average Coverage Per Step: 0.02
  Total Time: 1000
  Total Rewards: -439780.51
  Average Reward Per Step: -439.78
Average Reward over 1 episodes: -439780.5066119824

Animation¶

We can animated the results into a .mp4 file using save_animation from ./visualization/animate.py. ffmpeg is required for rendering the video.

In [ ]:
from visualization.animate import save_animation
#save_animation(user_positions_dqn, uav_positions_dqn, filename='./output/videos/dqn.mp4')
#save_animation(user_positions_ddqn, uav_positions_ddqn, filename='./output/videos/ddqn.mp4')
#save_animation(user_positions_dueldqn, uav_positions_dueldqn, filename='./output/videos/dueldqn.mp4')

Coverage¶

Calculate the AUC of coverage vs threshold radius

Coverage Helper Functions¶

Calculating average (in time step) coverage per radius

In [ ]:
def calculate_coverage_percentage(user_positions_list, uav_positions_list, radii):
    num_time_steps = len(user_positions_list)
    num_users = user_positions_list[0][0].shape[0]  # Corrected to access the first array within the list

    # Stack all user and UAV positions across all time steps
    all_user_positions = np.stack([pos[0] for pos in user_positions_list])  # Corrected to access the array
    all_uav_positions = np.stack([pos[0] for pos in uav_positions_list])    # Corrected to access the array

    # Calculate distances between each user and all UAVs at each time step
    distances = np.linalg.norm(
        all_user_positions[:, :, np.newaxis, :] - all_uav_positions[:, np.newaxis, :, :],
        axis=3
    )  # Shape: (num_time_steps, num_users, num_uavs)

    # Find the minimum distance for each user to any UAV at each time step
    min_distances = np.min(distances, axis=2)  # Shape: (num_time_steps, num_users)

    # Initialize an array to hold coverage counts for each radius
    coverage_counts = np.zeros(len(radii))  # Shape: (num_radii,)

    # Iterate over each radius to determine coverage
    for i, radius in enumerate(radii):
        # Check how many users are within this radius at each time step
        covered_users_per_step = np.sum(min_distances <= radius, axis=1)  # Shape: (num_time_steps,)
        # Average the coverage over all time steps
        coverage_counts[i] = np.mean(covered_users_per_step)

    # Calculate the coverage percentage for each radius
    coverage_percentages = (coverage_counts / num_users) * 100  # Shape: (num_radii,)

    return coverage_percentages
In [ ]:
# Define the range of coverage radii
radii = np.linspace(0, 100, num=100)

# Calculate the average coverage percentages
coverage_percentages = calculate_coverage_percentage(user_positions_dqn, uav_positions_dqn, radii)

print("Coverage Percentages:", coverage_percentages)
Coverage Percentages: [  0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   5.   5.   5.
  10.  15.  15.  15.  15.  15.  20.  20.  20.  20.  20.  25.  25.  30.
  35.  40.  45.  45.  45.  45.  45.  50.  50.  50.  50.  55.  55.  55.
  60.  60.  60.  60.  60.  60.  65.  65.  65.  70.  70.  70.  70.  75.
  75.  75.  75.  75.  80.  80.  85.  90.  90.  90.  90.  90.  90.  95.
  95. 100. 100. 100. 100. 100. 100. 100. 100. 100. 100. 100. 100. 100.
 100. 100. 100. 100. 100. 100. 100. 100. 100. 100. 100. 100. 100. 100.
 100. 100.]

Plotting coverage and calculating AUC

In [ ]:
from sklearn.metrics import auc
def plot_coverage_auc_single(user_positions_dqn, uav_positions_dqn, radii):
    # Calculate average coverage percentages across all time steps for each radius
    coverage_percentages = calculate_coverage_percentage(user_positions_dqn, uav_positions_dqn, radii)

    # Compute AUC
    area_under_curve = auc(radii, coverage_percentages)

    # Create plot
    fig = go.Figure()

    # Add trace for coverage percentage
    fig.add_trace(go.Scatter(
        x=radii,
        y=coverage_percentages,
        mode='lines',
        name=f'Coverage Percentage (AUC = {area_under_curve:.2f})',
        line=dict(color='royalblue', width=2)
    ))

    # Update layout
    fig.update_layout(
        title='Coverage Percentage vs. Coverage Radius',
        xaxis_title='Coverage Radius',
        yaxis_title='Coverage Percentage',
        width=600,  # Set the width of the plot
        height=600,  # Set the height of the plot
        showlegend=True,
        template='plotly_white',
        legend=dict(x=0.99, y=0.1, xanchor='right', yanchor='bottom')
    )

    # Show plot
    fig.show()
In [ ]:
def plot_coverage_auc(user_positions_dqn, uav_positions_dqn, user_positions_ddqn, uav_positions_ddqn, user_positions_duel_dqn, uav_positions_duel_dqn, radii):
    # Calculate average coverage percentages across all time steps for each radius
    coverage_percentages_dqn = calculate_coverage_percentage(user_positions_dqn, uav_positions_dqn, radii)
    coverage_percentages_ddqn = calculate_coverage_percentage(user_positions_ddqn, uav_positions_ddqn, radii)
    coverage_percentages_duel_dqn = calculate_coverage_percentage(user_positions_duel_dqn, uav_positions_duel_dqn, radii)

    # Compute AUC for each model
    auc_dqn = auc(radii, coverage_percentages_dqn)
    auc_ddqn = auc(radii, coverage_percentages_ddqn)
    auc_duel_dqn = auc(radii, coverage_percentages_duel_dqn)

    # Create plot
    fig = go.Figure()

    # Add trace for DQN
    fig.add_trace(go.Scatter(
        x=radii,
        y=coverage_percentages_dqn,
        mode='lines',
        name=f'DQN (AUC = {auc_dqn:.2f})',
        line=dict(color='blue', width=2)
    ))

    # Add trace for DDQN
    fig.add_trace(go.Scatter(
        x=radii,
        y=coverage_percentages_ddqn,
        mode='lines',
        name=f'DDQN (AUC = {auc_ddqn:.2f})',
        line=dict(color='green', width=2)
    ))

    # Add trace for Duel DQN
    fig.add_trace(go.Scatter(
        x=radii,
        y=coverage_percentages_duel_dqn,
        mode='lines',
        name=f'Duel DQN (AUC = {auc_duel_dqn:.2f})',
        line=dict(color='red', width=2)
    ))

    # Update layout
    fig.update_layout(
        title='Coverage Percentage vs. Coverage Radius for Different Models',
        xaxis_title='Coverage Radius',
        yaxis_title='Coverage Percentage',
        width=600,  # Set the width of the plot
        height=600,  # Set the height of the plot
        showlegend=True,
        template='plotly_white',
        legend=dict(x=0.99, y=0.1, xanchor='right', yanchor='bottom')
    )

    # Show plot
    fig.show()

Figure: Coverage AUC¶

In [ ]:
radii = np.linspace(0, 100, num=100)
In [ ]:
plot_coverage_auc(user_positions_dqn, uav_positions_dqn, user_positions_ddqn, uav_positions_ddqn, user_positions_dueldqn, uav_positions_dueldqn, radii)
In [ ]:
num_time_steps = len(user_positions_dqn)
start_index = num_time_steps // 2
plot_coverage_auc(user_positions_dqn[start_index:], uav_positions_dqn[start_index:], user_positions_ddqn[start_index:], uav_positions_ddqn[start_index:], user_positions_dueldqn[start_index:], uav_positions_dueldqn[start_index:], radii)

Best Model¶

In [ ]:
from core.dqn import DQN, DuelingDQN
dqn_model = torch.load(data_directory+"DQN_best"+'/best_model.pth.tar', map_location=device)
ddqn_model = torch.load(data_directory+"ddDQN_best"+'/best_model.pth.tar', map_location=device)
config_dqn = Config(network="DQN",
                 num_episodes=50,
                 batch_size=64,
                 gamma=0.75,
                 epsilon_start=1.0,
                 epsilon_end=0.1,
                 epsilon_decay=0.75,
                 target_update=10,
                 checkpoint_interval=10,
                 replay_buffer_capacity=5000,
                 learning_rate=0.00001,
                 evaluation_interval=5)
policy_net_dqn = DuelingDQN(state_size, env.num_uavs, action_size).to(device)
policy_net_dqn.load_state_dict(dqn_model['policy_net_state_dict'])
policy_net_ddqn = DQN(state_size, env.num_uavs, action_size).to(device)
policy_net_ddqn.load_state_dict(ddqn_model['policy_net_state_dict'])
average_reward_dqn, user_positions_dqn, uav_positions_dqn = evaluate_policy(env, policy_net_dqn, num_episodes=1, device=device)
average_reward_ddqn, user_positions_ddqn, uav_positions_ddqn = evaluate_policy(env, policy_net_ddqn, num_episodes=1, device=device)
plot_coverage_auc_single(user_positions_dqn, uav_positions_dqn, radii)
plot_coverage_auc_single(user_positions_ddqn, uav_positions_ddqn, radii)
Episode 0 UAV Performance Metrics:
  Total Distance Traveled: 3479.63
  Average Distance Per Step: 3.48
  Number of Users Covered: 18
  Average Coverage Per Step: 0.02
  Total Time: 1000
  Total Rewards: -388642.79
  Average Reward Per Step: -388.64
Average Reward over 1 episodes: -388642.7925946241
Episode 0 UAV Performance Metrics:
  Total Distance Traveled: 5555.42
  Average Distance Per Step: 5.56
  Number of Users Covered: 20
  Average Coverage Per Step: 0.02
  Total Time: 1000
  Total Rewards: -133419.70
  Average Reward Per Step: -133.42
Average Reward over 1 episodes: -133419.69525320735